/* ******************************************************************************************************* */
/* ************** Constructs datasets containing firm characteristics and SUEs for each year  **************/
/* ******************************************************************************************************* */

/* ********************************************************************************** */
/* Step 7.1: Start from Compustat (temp.cst_annual is firm-fiscal year panel data);   */
/* ********************************************************************************** */
proc sort data=comp.funda (keep=gvkey cusip conm datadate datafmt indfmt popsrc consol fyear fyr sich naicsh fic xad cik act lct epspx emp
  che at dltt lt pstkl pstk sale oibdp dp ib dvc csho dlc txditc cogs xrd pstkrv ceq txdb dcvt lt xsga prcc_f ppent xint txt capx aqc curcd) out=cst nodupkey;
  where at > 0 and sale >= 0 and 2019 >= fyear >= 2001 and indfmt= 'INDL' and datafmt= 'STD' and popsrc= 'D' and consol= 'C' and curcd = 'USD'; 
  * retrieve accounting information as early as 1960 to calculate cash flow volatility over the past 20 years;
  by gvkey fyear;
run;

data cst; set cst;
  if fyr gt 0;
  cyear = fyear;
  if fyr le 5 and fyr gt 0 then cyear = cyear+1;
  fyrenddt = intnx('month', mdy(fyr, 28, cyear), 0, 'end');
  format fyrenddt mmddyyn8.;
  drop indfmt datafmt popsrc consol curcd;

proc sql;
  create table cst_zip as
  select a.*, b.addzip, b.incorp, b.city as city_comp, b.state as state_comp, b.loc as country
  from cst as a left join comp.company as b
  on a.gvkey = b.gvkey;
quit;

data cst_zip; set cst_zip;
  zipcode = substr(addzip, 1, 5)*1;
  zipcode4 = int(zipcode/10);

proc sql;
  create table cst_zip3 as
  select a.*, b.longitude, b.latitude, b.city, b.state
  from cst_zip as a left join comp.zipcode as b
  on a.zipcode = b.zip;
quit;

proc sql;
  create table cstrating as
  select a.*, b.splticrm
  from cst_zip3 as a left join comp.adsprate as b
  on a.gvkey = b.gvkey and intck('month', b.datadate, a.fyrenddt) in (0);
quit;

proc sql;
  create table cst_annual as 
  select a.*, b.lpermno as permno, b.lpermco as permco, b.linktype, b.linkprim, b.liid
  from cstrating as a, crspcc.ccmxpf_linktable as b
  where a.gvkey = b.gvkey and linktype in ("LC", "LN", "LU", "LX", "LD", "LS") and
        usedflag in (1) and (fyrenddt >= linkdt or linkdt = .B) and (fyrenddt <= linkenddt or linkenddt = .E)
  order by gvkey, fyrenddt, lpermno, lpermco, linkdt, linkenddt; 
quit;

proc sort data = cst_annual; by gvkey permco permno fyear linktype descending linkprim liid;

data cst_annual1; set cst_annual; by gvkey permco permno fyear linktype descending linkprim liid;
  if first.fyear;
run;

/* *************************************************************************** */
/* Step 7.2: Merge with CRSP (temp.cst_annual2 has firm-level panel data); */
/* *************************************************************************** */

****************************************************************************************************************
***************************   Construct a firm-meeting date panel for all firms  ******************************* 
****************************************************************************************************************;
proc sort data=crsp.stocknames (keep= permno permco comnam namedt nameenddt shrcd ticker exchcd siccd) out = permnolist nodupkey;
  by permno namedt nameenddt; 
run;

proc sql undo_policy = none;
	create table cst_list as select distinct
	a.*, b.fyrenddt, b.gvkey, b.datadate
	from permnolist a, cst_annual1 b
	where (a.permno = b.permno) and a.namedt <= b.datadate <= a.nameenddt; 

	* add market cap, shares outstanding, number of share class at fyrenddt;
	create table cst_list as select distinct
	a.*, (abs(b.prc)*b.shrout/1000) as mve_crsp_permno, abs(b.prc) as prc_crsp_permno, b.shrout as shrout_permno, b.date as mve_date 
	from cst_list a left join crsp.msf b
	on (a.permno = b.permno) and intck('month', b.date, a.datadate) = 0; 

	* merge with CRSP;
	create table cst_annual1b as select 
	*
	from cst_list as a left join cst_annual1 as b
	on a.fyrenddt = b.fyrenddt and a.permno =  b.permno; 
quit;

proc sort data = cst_list nodupkey; by gvkey datadate permco permno fyrenddt; **** DUAL-class firms show up as two observations in this dataset;

data cst_list; set cst_list; by gvkey datadate permco permno fyrenddt;
  if last.permno; 

proc means data = cst_list noprint; by gvkey datadate permco; 
  var mve_crsp_permno shrout_permno; 
  output out = cst_list2 (drop=_type_ _freq_) sum = mve_crsp shrout;
run;

proc sql;
  create table cst_annual2 as select 
  *
  from cst_annual1b a, cst_list2 b
  where a.gvkey = b.gvkey and a.datadate = b.datadate and a.permco = b.permco; 
quit;



data cst_annual2a; set cst_annual2;
  if permco ne .;
  fyme = prcc_f*csho;
  pct_diff = mve_crsp/fyme - 1;
  fyme2 = fyme;
  if fyme < mve_crsp then fyme2 = mve_crsp;
  ratio = csho*1000/shrout;
  name_dis = min(spedis(conm, comnam), spedis(comnam, conm));

proc sort data = cst_annual2a nodupkey; by gvkey permco datadate permno; run;

proc sql;
  create table cst_annual2a as
  select a.*, b.ceq as ceq_lag, b.at as at_lag, b.sale as sale_lag, b.fyrenddt as fyrenddt_lag, abs(intck('month', b.fyrenddt, a.fyrenddt)-12) as diff
  from cst_annual2a as a left join cst_annual2a as b
  on a.gvkey = b.gvkey and a.permco = b.permco and a.permno = b.permno and a.fyear = b.fyear+1;
run;

proc sort data = cst_annual2a nodupkey; by gvkey permco datadate permno diff; 

data cst_annual2a; set cst_annual2a; by gvkey permco datadate permno diff; 
  if first.permno;
run;



proc sql;
  create table cst_annual2b as
  select a.*, input(a.gvkey, best8.0) as gvkey_num, b.ff48 as ind, b.ff48ind
  from cst_annual2a as a left join root.ff48 as b
  on ff1 <= siccd <= ff2;
quit;

proc sql;
  create table cst_annual2c as
  select a.*, b.start as sp500_start, b.ending as sp500_ending
  from cst_annual2b as a left join crspix.msp500list as b
  on a.permno = b.permno and b.start <= a.datadate <= b.ending;
quit;

* calculate # of business segments and geographic segments;
proc sql;
  create table busseg as
  select a.permco, a.gvkey, a.datadate, a.fyear, b.sid, b.sales as seg_sale
  from cst_annual2c as a, seghist.seg_annfund as b
  where a.gvkey = b.gvkey and a.datadate = b.datadate and b.stype = 'BUSSEG';
quit;


proc sort data = busseg; by permco gvkey datadate; 

proc means data = busseg noprint; by permco gvkey datadate;
  var seg_sale;
  output out = temp_busseg (drop=_type_ _freq_) n = num_busseg sum = total_sale;

data temp; merge busseg temp_busseg; by permco gvkey datadate;
  if total_sale > 0 then frac2 = (seg_sale/total_sale)*(seg_sale/total_sale);

proc means noprint; by permco gvkey datadate;
  var frac2;
  output out = temp_busseg2 (drop=_type_ _freq_) sum = hhi;

proc sql;
  create table geoseg as
  select a.permco, a.gvkey, a.datadate, a.fyear, b.sid, b.sales as seg_sale
  from cst_annual2c as a, seghist.seg_annfund as b
  where a.gvkey = b.gvkey and a.datadate = b.datadate and b.stype = 'GEOSEG';
quit;

proc sort data = geoseg; by permco gvkey datadate; 

proc means data = geoseg noprint; by permco gvkey datadate;
  var seg_sale;
  output out = temp_geoseg (drop=_type_ _freq_) n = num_geoseg;

proc sort data = cst_annual2c; by permco gvkey datadate;

data cst_annual2d; merge cst_annual2c (in=a) temp_busseg temp_geoseg temp_busseg2; by permco gvkey datadate;
  if siccd not in (0, .) and ind = . then ind = 48;
  cash = che/at; 
  if siccd not in (0, .) then do; 
    ind2 = int(siccd/100);
    ind3 = int(siccd/10);
  end;

  if sp500_ending ne . then sp500 = 1; else sp500 = 0;

  if dltt <= .Z then dltt = 0;
  if capx <= .Z then capx = 0; 
  if fyme >0 and dltt >= 0 then capital = log(fyme+dltt);
  if fyme < 0 then capital = log(fyme2+dltt);

  if pstkl >= 0 then prf = pstkl; else if pstkrv >= 0 then prf = pstkrv; else if pstk >= 0 then prf = pstk;
  if splticrm not in ('', 'N.M.', 'SD', 'Suspended') then rating = 1; else rating = 0;
  if epspx not in (0, .) then pe = prcc_f/epspx;
  if txditc <= .Z then txditc = 0;
  debt = lt+prf-txditc-dcvt;
  debt2 = dltt+dlc;
  ebitda_ta = oibdp/at;
  mv = lt-txditc+prf+fyme;
  if act <= .Z then act = 0;
  if lct <= .Z then lct = 0;
  liquidity = (act-lct)/at;
  de = dltt/ceq;
  mb = fyme/ceq;
  bm = ceq/fyme;
  q1 = (at+fyme-ceq)/at;
  q2= (at+fyme2-ceq)/at;
  q3= (lt-txditc+prf+fyme)/at;
  q4= (lt-txditc+prf+fyme2)/at;
  q5 = (at+mve_crsp-ceq-txdb)/at;
  acquisition = aqc/at;
  tangible = ppent/at;
  capex = capx/at;
  size = log(at);
  sales = log(1+sale);
  be = at-lt+txditc-prf;
  leverage_b = (at-be)/at; 
  leverage_m = (at-be)/mv;
  leverage_bnet = (at-be-che)/at; 
  leverage_mnet = (at-be-che)/mv;
  yield = dvc/fyme;
  yield2 = dvc/fyme2;

  if dvc > 0 then div = 1; else div = 0;
  cashflow_at = (ib+dp)/at;
  cashflow = ib+dp;
  if xint <= .Z then xint = 0;
  freecashflow_at = (oibdp-xint-txt-capx)/at;
  *	IB - Income Before Extraordinary Items;
  * DP - Depreciation and Amortization;
  if xrd > .Z then do; 
    rnd = xrd/at; rnd_sale = xrd/sale; rnd_dummy = 0; end;
    else do; 
      rnd = 0; rnd_sale = 0; rnd_dummy = 1; end;
  invest = capex+rnd;
  if oibdp = . and oiadp+dpc ne . then oibdp = oiadp+dpc;
  roa = ib/at;
  if num_geoseg in (., 0) then num_geoseg = 1;
  if num_busseg in (., 0) then num_busseg = 1;
  if xad > .Z then do; 
    adv = xad/at; adv_sale = xad/sale; adv_dummy = 0; end;
    else do; 
      adv = 0; adv_sale =0; adv_dummy = 1; end;
  logme = log(fyme);
  logme2 = log(fyme2);
  if fyme > 0 and dltt >= 0 then capital = log(fyme+dltt);
  if capital = . then capital = logme2;

proc sql;
  create table cst_annual2d as
  select a.*, b.roa as roa_post, b.fyrenddt as fyrenddt_post, abs(intck('month', b.fyrenddt, a.fyrenddt)+12) as diff2
  from cst_annual2d as a left join cst_annual2d as b
  on a.gvkey = b.gvkey and a.permco = b.permco and a.permno = b.permno and a.fyear = b.fyear-1;
run;

proc sort data = cst_annual2d nodupkey; by gvkey permco datadate permno diff2; 

data cst_annual2d; set cst_annual2d; by gvkey permco datadate permno diff2; 
  if first.permno;

proc sort data = cst_annual2d nodupkey; by gvkey permno datadate;
run;



************************************************************************************************;
********************************** Calculate prior stock returns *******************************;
************************************************************************************************;
proc sort data = cst_annual2d (keep=permno datadate) out = ret nodupkey; by permno datadate;

proc sql;
  create table ret1 as
  select a.*, b.ret, b.date as datem
  from ret as a, crsp.msf as b
  where a.permno = b.permno and 0 <= intck('month', b.date, a.datadate) <= 35;
quit;

proc sql;
  create table ret2
  as select a.*, b.vwretd, a.ret-b.vwretd as xret
  from ret1 as a, crsp.msi as b
  where a.datem = b.date;
quit;

proc sort data = ret2; by permno descending datadate;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; 
  output out = xret3y (drop=_type_ _freq_) sum = ret3y xret3y mret3y std(ret) = vol3y std(xret) = ivol3y n(xret) = num_months36;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; where intck('month', datem, datadate) <= 23;
  output out = xret2y (drop=_type_ _freq_) sum = ret2y xret2y mret2y std(ret) = vol2y std(xret) = ivol2y n(xret) = num_months24;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; where intck('month', datem, datadate) <= 11;
  output out = xret1y (drop=_type_ _freq_) sum = ret1y xret1y mret1y std(ret) = vol1y std(xret) = ivol1y n(xret) = num_months12;

data temp_cst_return; merge xret3y xret1y xret2y; by permno descending datadate;
run;

proc sql;
  create table cst_annual2x as
  select a.*, b.*
  from cst_annual2d as a left join temp_cst_return as b
  on a.permno = b.permno and a.datadate = b.datadate;
quit;



************************************************************************************************;
***********************************   ANALYSTS  ************************************************;
************************************************************************************************;
proc sql;
  create table cst_link as
  select a.gvkey, a.permco, a.permno, a.datadate, b.ticker, b.score
  from cst_annual2d as a left join ibes.iclink as b
  on a.permno = b.permno and b.score in (0,1,2);
quit;

proc sort data = cst_link; by gvkey permco permno datadate score;

data temp_cst_link; set cst_link; by gvkey permco permno datadate score;
  if first.datadate;
run;

* Using detail file;
proc sql;
  create table analysts as
  select a.*, b.usfirm, b.cusip, value, anndats, fpedats, estimator, analys
  from temp_cst_link as a left join ibes.Detu_epsus as b
  on a.ticker = b.ticker and 0 <= intck('month', b.fpedats, a.datadate) <= 11 and FPI='1' and measure="EPS" 
    and b.usfirm = 1 and 1 <= intck('day', b.anndats, b.fpedats) <= 365;
quit;

proc sort data = analysts; by gvkey permco datadate permno fpedats estimator analys anndats; where value ne .;

data analysts; set analysts; by gvkey permco datadate permno fpedats estimator analys anndats; 
  if last.analys;
  if weekday(anndats) = 1 then anndats2 = anndats-2; else
  if weekday(anndats) = 7 then anndats2 = anndats-1; else
  anndats2 = anndats;

  if weekday(fpedats) = 1 then fpedats2 = fpedats-2; else
  if weekday(fpedats) = 7 then fpedats2 = fpedats-1; else
  fpedats2 = fpedats;

* get CRSP Cumulative adjustment factor as of report date of the estimate;
proc sql;
  create table analysts1 as
  select a.*, b.date as permno_date_est, b.cfacshr as crspadj_est, b.prc as prc_est
  from analysts as a left join crsp.dsf (keep=permno cfacshr date prc) as b
  on a.permno = b.permno and a.anndats2 = b.date;
quit;

proc sql;
  create table analysts2 as
  select a.*, b.date as permno_date_rep, b.cfacshr as crspadj_rep, b.prc as prc_rep
  from analysts1 as a left join crsp.dsf (keep=permno cfacshr date prc) as b
  on a.permno = b.permno and a.fpedats2 = b.date;
quit;

data analysts2; set analysts2; 
  value_adj = value/(crspadj_est/crspadj_rep);

proc sort data = analysts2; by gvkey permco datadate permno fpedats;

proc means data = analysts2 noprint; by gvkey permco datadate permno fpedats;
  var value value_adj;
  output out = analysts2 n(value_adj) = nanalyst std(value) = dispersionu std(value_adj) = dispersion;

data temp_analysts; set analysts2; by gvkey permco datadate permno fpedats;
  if last.permno;
run;

************************************************************************************************;
*********************************** END OF ANALYSTS  *******************************************;
************************************************************************************************;


proc sql;
  create table cst_annual2x as
  select a.*, b.nanalyst, b.dispersion
  from cst_annual2x as a left join temp_analysts as b
  on a.permno = b.permno and a.datadate = b.datadate;
quit;

data cst_annual2x; set cst_annual2x;
  if nanalyst = . and fyear > 1985 then nanalyst = 0;

proc sort data = crsp.msf (keep=permno date) out = msf; by permno date;

data firm_age; set msf; by permno date;
  if first.permno;
  first_date_crsp = date;
  drop date;
  format first_date_crsp date9.;
run;

proc sort data = cst_annual2x; by permno;

data cst_annual2b; merge cst_annual2x (in=a) firm_age; by permno; if a;
  firmage = intck('year', first_date_crsp, datadate);
  if shrcd in (10,11,12) and exchcd in (1, 2, 3);
run;


************************************************************************************************;
***********************************   Stock Liquidity  *****************************************;
************************************************************************************************;
proc sql;
  create table illiq as
  select a.permno, a.datadate, b.date, b.ret, b.vol, b.prc, intnx('month', b.date, 0, 'end') as mend
  from cst_annual2b as a, crsp.dsf as b
  where a.permno = b.permno and 0 <= intck('month', b.date, a.datadate) <= 11;
quit;

proc sort data = illiq nodupkey; by permno datadate date;

data illiq2; set illiq; by permno datadate;
  dvolume = vol*abs(prc)/1000000; * Dollar volume in million $s;
  if first.datadate then do;
    sum_pi = 0; sum_pi2 = 0; ni=0; sum_pl = 0; sum_pl2 = 0; nl=0; zr = 0;
  end;
  if dvolume > 0 then do;
    pi2 = abs(ret)/dvolume; pi = sqrt(pi2);
	sum_pi2+pi2; sum_pi+pi; ni+1;
  end;
  if abs(ret) >= 0 then do;
    if ret = 0 then do; 
	  zr+1;
      if year(date) <= 2000 then ret = .001;
	  else if year(date) > 2000 then ret = .0001;
	end;
    pl2 = dvolume/abs(ret); pl = sqrt(pl2);
	sum_pl2+pl2; sum_pl+pl; nl+1;
  end;
  if last.datadate then do; 
    illiq = sum_pi/ni; illiq2 = sum_pi2/ni; liq = sum_pl/nl; liq2 = sum_pl2/nl; zrindex = zr/nl; output;
  end;
  keep permno datadate illiq illiq2 liq liq2 zrindex; run;

proc sort data = illiq2; by descending illiq; run;

proc means data = illiq2 n mean median p1 p5 p10 p90 p95 p99 max; 
  var illiq2 illiq;

%winsor(dsetin=illiq2, dsetout=temp_illiq, byvar=none, vars=illiq2 illiq, type=winsor, pctl=.5 99.5);
run;

proc sort data = temp_illiq; by permno datadate;

proc sort data = cst_annual2b; by permno datadate;

data cst_annual2c; merge cst_annual2b (in=a) temp_illiq; by permno datadate; if a;
run;



/***** LINK TO mutual fund holdings;*/
proc sql;
  create table mfo as
  select a.permno, a.datadate, b.wficn, b.wt_adj as wt, b.rdate, b.fdate
  from cst_annual2c as a left join temp.s12_holdings as b
  on a.permno = b.permno and 0 <= intck('month', b.rdate, a.datadate) <= 6;
quit;* temp.s12_holdings is from 02Holdings.sas;

proc sort data = mfo; by permno datadate wficn descending rdate fdate; 

data mfo; set mfo; by permno datadate wficn descending rdate; 
  if first.wficn;
run;

data mfo1a; set mfo;
  if wt ne .; 

data mfo1b; set mfo;
  if wt = .; 

proc sql;
  create table mfo1b as
  select a.permno, a.datadate, b.crsp_cl_grp, b.wt, b.report_dt as rdate
  from mfo1b as a, temp.holdings as b
  where a.permno = b.permno and 0 <= intck('month', b.report_dt, a.datadate) <= 6;
quit;run; * temp.holdings is from 02Holdings.sas;

proc sort data = mfo1b; by permno datadate crsp_cl_grp descending rdate; 

data mfo1b; set mfo1b; by permno datadate crsp_cl_grp descending rdate; 
  if first.crsp_cl_grp;

data mfo2; set mfo1a mfo1b;
  if wt > 0;

proc sort; by permno datadate;

proc means noprint; by permno datadate;
  var wt;
  output out = temp.mfo_annual (drop=_type_ _freq_) sum = mfo_all;
run;

proc sort data=temp.cst_annual; by permno datadate;

data temp.cst_annual; merge cst_annual2c temp.mfo_annual; by permno datadate;
  if mfo_all = . then mfo_all = 0;
run;


*** STEP 2: Computes SUE at the firm-fyear level;
%include 'D:\Dropbox\InformedVoting\SAS Code\ibes_sample.sas';

* variables to extract from IBES;
%let begindate='01jan2003'd; 				*start calendar date of fiscal period end;
%let enddate='30jun2019'd; 					*end calendar date of fiscal period end;
%let ibes_vars= ticker value fpedats anndats revdats measure fpi estimator analys pdf usfirm;
%let comp_list= gvkey fyear conm datadate sale at prcc_f csho fyr consol indfmt datafmt popsrc;

* timing and primary filters for Compustat Xpressfeed;
%let comp_where=where=(fyr>0 and (sale>=0 or at>0) and consol='C' and popsrc='D' and indfmt='INDL' and datafmt='STD' and missing(datadate)=0);

* IBES filters;
%let ibes_where1=where=(measure='EPS' and fpi in ('1') and &begindate<=fpedats<=&enddate); 
%let ibes_where2=where=(missing(repdats)=0 and missing(anndats)=0 and anndats < repdats and 0 < intck('day',anndats,repdats)<=365);

proc sort data = crsp.msf (keep=permno date) out = permno_list nodupkey; by permno date; 

** READ in tickers;
proc sql;
  create table cst_link as
  select a.permno, a.date, b.ticker, b.score
  from permno_list as a left join ibes.iclink as b
  on a.permno = b.permno and b.score in (0,1,2); 
quit;

proc sort data = cst_link; by permno date score;

data cst_link; set cst_link; by permno date;
  if first.date;

proc sql;
   create table cst_link
   as select a.*, b.gvkey, b.lpermco as permco, b.linktype, b.linkprim, b.liid
   from cst_link as a left join crspcc.ccmxpf_linktable as b
   on a.permno=b.lpermno and linktype in ("LC", "LN", "LU", "LX", "LD", "LS") and ((b.linkdt<=a.date <=b.linkenddt) or
   (b.linkdt<=a.date and b.linkenddt=.E) or (b.linkdt=.B and a.date <=b.linkenddt)) and b.usedflag=1;
quit;

proc sort data = cst_link; by permno permco date linktype descending linkprim liid;

data cst_link; set cst_link; by permno permco date linktype descending linkprim liid;
  if first.date;
  drop linktype linkprim liid;
run;
run;

proc sort data = cst_link out = tickers (keep=ticker) nodupkey; by ticker; run;

%IBES_SAMPLE(infile=tickers, ibes1_where=&ibes_where1, ibes2_where=&ibes_where2, ibes_var=&ibes_vars); 

data medest; set medest;
  if repdats >= fpedats;
run;

* COMPUSTAT EXTRACT;
proc sql;
   create table comp (drop=consol indfmt datafmt popsrc)
   as select *, prcc_f*csho as mcap
   from comp.funda (keep=&comp_list &comp_where) as a, cst_link as b
   where a.gvkey=b.gvkey and intck('month', a.datadate, b.date) = 0;

   create table comp
   as select *
   from comp a left join (select distinct gvkey,ibtic from comp.security (where=(missing(ibtic)=0))) b
   on a.gvkey=b.gvkey;
quit;

* Create calendar date of fiscal period end in Compustat extract;
data comp; set comp;
   if &begindate<=datadate<=&enddate;
run;

* a) Link Gvkey with Lpermno;
proc sql;
create table comp1
   as select a.*, abs(b.prc) as price_qend
   from comp as a left join crsp.msf as b
   on a.permno = b.permno and intck('month', a.datadate, b.date) = 0;
quit;

* c) Link IBES analysts' expectations (MEDEST), IBES report dates (repdats)
* and actuals (act) with Compustat data;
proc sql;
create table comp1
   as select a.*, b.fpi, b.medest, b.meanest, b.numest, b.repdats, b.act, b.price, b.basis, b.dispersion, b.dispersion_scaled
   from comp1 as a left join medest as b
   on a.ticker=b.ticker and year(a.datadate)*100+month(a.datadate)=year(b.fpedats)*100+month(b.fpedats);
quit;

* remove fully duplicate records and pre-sort;
proc sort data = comp1 noduprec; by _all_;run;

proc sort data = comp1; by gvkey fyear;
run;

data comp2; set comp1;
  sue = (act-medest)/prcc_f;

proc sort data=comp2 nodupkey; by permno datadate;

%winsor(dsetin=comp2, dsetout=temp.sue_annual, byvar=fyear, vars=sue, type=winsor, pctl=2.5 97.5); 
run;
